Introduction

We investigate a dataset of 102 bulk RNA-seq samples extracted from the prefrontal cortex of 102 deceased individuals (overview). The dataset is provided by H Rehrauer of FGCZ (details). Roughly half of the samples are from patients diagnosed with ALS, the remaining ones are referred to as the control group. We attempt to infer cell type proportions hidden in each bulk RNA-seq sample. This procedure, known as cell type deconvolution (overview), should be integral to inference from the sample to disease condition. We focus here on reference-based deconvolution: in essence, if \(X\) is a bulk RNA-seq count vector, and \(a, b, \ldots\) are single-cell RNA-seq count vectors of representatives of relevant cells types, one aims to find non-negative coefficients \(\alpha, \beta, \ldots\) such that \(X \approx \alpha a + \beta b + \ldots\). Unfortunately, this reconstruction is confounded by batch effects such as difference in sequencing platforms, in particular different statistical properties of the technical noise of bulk versus single-cell RNA-seq.

As cell type reference datasets we resort to single-cell RNA-seq dataset Darmanis et al (~400 cells) and single-nucleus RNA-seq dataset Allen Brain M1 (>70k cells). The two are compared here.

(in progress)

Preliminaries

Setup

suppressPackageStartupMessages({
  requireNamespace("magrittr")
  library(dplyr)
  library(ggplot2)

  requireNamespace("pheatmap")
  requireNamespace("stringr")
  requireNamespace("reshape2")

  # avoid importing `exprs` that leads to clashes
  requireNamespace("rlang")

  # https://vroom.r-lib.org/articles/vroom.html
  # install.packages("vroom")
  requireNamespace("vroom")

  # install.packages("pathlibr")
  requireNamespace("pathlibr")

  # Provides `ExpressionSet` data structure
  # BiocManager::install("Biobase")
  requireNamespace("Biobase")

  # Deconvolution package
  # install.packages("BisqueRNA")
  requireNamespace("BisqueRNA")

  # # https://xuranw.github.io/MuSiC/articles/MuSiC.html
  # install.packages("devtools")
  # devtools::install_github("xuranw/MuSiC")
  requireNamespace("xbioc") # For MuSiC
  requireNamespace("MuSiC")

  # install.packages("kableExtra")
  requireNamespace("kableExtra")
})

Session info

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8    LC_NUMERIC=C            LC_TIME=lzh_TW         
##  [4] LC_COLLATE=en_US.UTF-8  LC_MONETARY=lzh_TW      LC_MESSAGES=en_US.UTF-8
##  [7] LC_PAPER=lzh_TW         LC_NAME=C               LC_ADDRESS=C           
## [10] LC_TELEPHONE=C          LC_MEASUREMENT=lzh_TW   LC_IDENTIFICATION=C    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.3.2 dplyr_1.0.2  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5           lattice_0.20-40      assertthat_0.2.1    
##  [4] digest_0.6.27        R6_2.5.0             pathlibr_0.1.0      
##  [7] plyr_1.8.6           nnls_1.4             backports_1.2.1     
## [10] MatrixModels_0.4-1   stats4_3.6.3         RSQLite_2.2.1       
## [13] evaluate_0.14        coda_0.19-4          httr_1.4.2          
## [16] pillar_1.4.7         rlang_0.4.9          rstudioapi_0.13     
## [19] SparseM_1.78         blob_1.2.1           S4Vectors_0.24.4    
## [22] Matrix_1.2-18        checkmate_2.0.0      rmarkdown_2.6       
## [25] webshot_0.5.2        stringr_1.4.0        pheatmap_1.0.12     
## [28] bit_4.0.4            munsell_0.5.0        compiler_3.6.3      
## [31] xfun_0.19            pkgconfig_2.0.3      BiocGenerics_0.32.0 
## [34] pkgmaker_0.32.2      mcmc_0.9-7           htmltools_0.5.0     
## [37] tidyselect_1.1.0     tibble_3.0.4         matrixStats_0.57.0  
## [40] IRanges_2.20.2       codetools_0.2-16     viridisLite_0.3.0   
## [43] crayon_1.3.4         conquer_1.0.2        withr_2.3.0         
## [46] MASS_7.3-51.5        MuSiC_0.1.1          grid_3.6.3          
## [49] xtable_1.8-4         gtable_0.3.0         lifecycle_0.2.0     
## [52] registry_0.5-1       DBI_1.1.0            magrittr_2.0.1      
## [55] scales_1.1.1         stringi_1.5.3        vroom_1.3.2         
## [58] reshape2_1.4.4       xbioc_0.1.19         xml2_1.3.2          
## [61] ellipsis_0.3.1       generics_0.1.0       vctrs_0.3.6         
## [64] kableExtra_1.3.1     RColorBrewer_1.1-2   tools_3.6.3         
## [67] bit64_4.0.5          Biobase_2.46.0       glue_1.4.2          
## [70] purrr_0.3.4          parallel_3.6.3       yaml_2.2.1          
## [73] AnnotationDbi_1.48.0 colorspace_2.0-0     BiocManager_1.30.10 
## [76] rvest_0.3.6          BisqueRNA_1.0.4      memoise_1.1.0       
## [79] knitr_1.30           quantreg_5.75        MCMCpack_1.4-9

Paths

BASEPATH <- pathlibr::Path$new(".")
stopifnot("deconvolution.Rmd" %in% names(BASEPATH$dir))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)
path_to <- (function(.) Sys.glob(BASEPATH$join("../../..")$join(.)$show))

Random-seed

set.seed(43)

Row names, etc.

# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
group_by_and_sum <-
  function(., column_name) {
    (.) %>%
      dplyr::group_by(!!rlang::sym(column_name)) %>%
      dplyr::summarise_all(sum) %>%
      tibble::column_to_rownames(column_name)
  }
fix_names <- function(.) {
  (.) %>%
    # https://stackoverflow.com/a/55184433
    rlang::set_names(stringr::str_replace(names(.), "cell type", "celltype"))
}
col_norm2 <- (function(.) t(t(.) / sqrt(colSums((.)**2))))

File I/O

The function utils::read.delim is too slow to read wide tables (Allen Brain M1 has >70k cells).

Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 32)
from_csv <- function(.) {
  vroom::vroom(., del = "\t") %>%
    by_col1() %>%
    suppressMessages()
}

A call to to_csv should be preceded by ind2col("name for index") to write the row names.

to_csv <- function(., f) {
  vroom::vroom_write(., out_file(f), delim = "\t")
}

Plotting

ggplot2::theme_set(theme_light(base_size = 15))
kable <- function(.) {
  kableExtra::kbl(., align = "c") %>%
    kableExtra::kable_paper("hover", full_width = F)
}
# Wide plots (inches?)
WIDTH1 <- 20
HEIGHT1 <- 3
hist_theme <-
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_blank(),
  )
save_barchart <-
  function(., filename) {
    as.data.frame(.) %>%
      ind2col("celltype") %>%
      reshape2::melt(
        id = "celltype",
        var = "sample",
        value.name = "y"
      ) %>%
      {
        ggplot(., aes(x = sample, y = y, fill = celltype)) +
          geom_bar(stat = "identity") +
          scale_fill_brewer(palette = "Paired") +
          ylim(0, 1) +
          hist_theme +
          theme(
            legend.title = element_blank(),

            axis.text.y = element_blank(),

            axis.title.x = element_blank(),
            axis.text.x = element_text(
              a = -90,
              vjust = 0.5,
              hjust = 0,
              size = 10,
            )
          ) +
          ggsave(
            filename = out_file(filename),
            width = WIDTH1,
            height = HEIGHT1,
            device = "png"
          )
      } %>%
      suppressWarnings()
    return(.)
  }
save_heatmap <-
  function(., filename) {
    as.data.frame(.) %>%
      pheatmap::pheatmap(
        filename = out_file(filename),
        cluster_rows = FALSE,
        cluster_cols = FALSE,
        fontsize_row = 14,
        fontsize_col = 10,
        width = WIDTH1,
        height = HEIGHT1
      )
    return(.)
  }

Data sources

Bulk expression: FGCZ

fgcz_data <-
  path_to("data/20201128-FGCZ/*count.zip") %>%
  unz(., unzip(., list = TRUE)$Name) %>%
  from_csv() %>%
  # Collapse ENSG IDs by gene_name:
  group_by_and_sum("gene_name")

fgcz_meta <- from_csv(path_to("data/20201128-FGCZ/*infos.tsv"))

This will be used later.

assert_fgcz_meta_order <-
  function(.) {
    stopifnot(all(rownames(.) == rownames(fgcz_meta)))
    return(.)
  }
t(fgcz_data) %>%
  assert_fgcz_meta_order() %>%
  invisible()

Ref snRNA: Darmanis

darm_data <- from_csv(path_to("data/*/2015-Darmanis/b*/data.csv.gz"))
darm_meta <- from_csv(path_to("data/*/2015-Darmanis/b*/meta.csv.gz")) %>%
  fix_names()
exclude_celltypes <- c("fetal_quiescent", "fetal_replicating", "hybrid")

Plot cell type counts.

darm_meta$celltype %>%
  data.frame(x = .) %>%
  ggplot(aes(x = x, fill = if_else(x %in% exclude_celltypes, "drop", "keep"))) +
  geom_bar() +
  ggtitle("Cell types in the 'Darmanis' reference dataset") +
  scale_y_log10() +
  hist_theme +
  theme(axis.title.x = element_blank()) +
  theme(axis.text.x = element_text(a = 45, hjust = 1)) +
  theme(legend.title = element_blank())

Remove unnecessary cell types from the scRNA reference dataset.

darm_meta <-
  darm_meta %>%
  dplyr::filter(!(celltype %in% exclude_celltypes))

darm_data <-
  darm_data %>%
  dplyr::select_if(names(.) %in% rownames(darm_meta))

stopifnot(285 == nrow(darm_meta))
stopifnot(285 == ncol(darm_data))

Ref scRNA: Allen Brain M1

abm1_meta <- from_csv(path_to("data/*/2019-AllenBrain-M1/b*/meta.csv*"))
abm1_data <- from_csv(path_to("data/*/2019-AllenBrain-M1/b*/data.csv*"))

Some sanity checks.

# Number of single cells
stopifnot(ncol(abm1_data) > 70000)
stopifnot(nrow(abm1_meta) > 70000)
# Number of genes and some examples
stopifnot(nrow(abm1_data) == 141)
stopifnot(all(c("PIK3CD", "WNT4", "LDLRAP1") %in% rownames(abm1_data)))

Drop samples with too little expression, otherwise Bisque doesn’t go through.

abm1_data <-
  abm1_data %>%
  # Keep only genes common with the FGCZ dataset
  {
    (.)[rownames(.) %in% rownames(fgcz_data), ]
  } %>%
  # Drop zero columns (dplyr variant too slow)
  {
    (.)[, colSums(.) != 0]
  }

abm1_meta <-
  abm1_meta %>%
  dplyr::filter(rownames(.) %in% colnames(abm1_data))

Plot cell type counts.

data.frame(
  x = abm1_meta$celltype,
  donor = abm1_meta$donor
) %>%
  ggplot(aes(x = x, fill = donor)) +
  geom_bar(position = "dodge") +
  ggtitle("Cell types in the 'Allen Brain M1' reference dataset") +
  scale_fill_brewer(palette = "Set1") +
  scale_y_log10() +
  hist_theme +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_blank())

Marker genes

These marker genes were extracted from the Allen Brain M1 scRNA dataset. Technically, these should be the same as in abm1_data before filtering. Just in case, we subset them the maximal common subset.

abm1_markergenes <-
  path_to("data/*/*AllenBrain-M1/b*/marker_genes.csv") %>%
  from_csv() %>%
  rownames() %>%
  {
    (.)[(.) %in% rownames(fgcz_data)]
  } %>%
  {
    (.)[(.) %in% rownames(abm1_data)]
  }

stopifnot(117 == length(abm1_markergenes))

Deconvolution

From Darmanis

In this section we use Darmanis et al as the reference dataset.

Bisque

Repackage the data following the Bisque vignette.

fgcz_eset <- Biobase::ExpressionSet(
  # Subset to marker genes for residual norm computation
  assayData = as.matrix(fgcz_data[abm1_markergenes, ])
)
darm_eset <- Biobase::ExpressionSet(
  # Expression data
  assayData = as.matrix(darm_data),
  # Metadata
  phenoData = Biobase::AnnotatedDataFrame(
    data = data.frame(
      row.names = rownames(darm_meta),
      cellType = darm_meta$celltype,
      SubjectName = darm_meta$experiment_sample_name,
      check.names = FALSE,
      check.rows = FALSE,
      stringsAsFactors = FALSE
    ),
    varMetadata = data.frame(
      row.names = c("cellType", "SubjectName"),
      labelDescription = c("cellType", "SubjectName")
    )
  )
)

Deconvolution.

bisque_report <-
  BisqueRNA::ReferenceBasedDecomposition(
    # BULK DATA
    bulk.eset = fgcz_eset,
    # REFERENCE
    sc.eset = darm_eset,
    #
    use.overlap = FALSE,
    markers = abm1_markergenes,
    verbose = FALSE
  )
# Fields of the Bisque result
bisque_report %>%
  summary() %>%
  kable()
Length Class Mode
bulk.props 612 -none- numeric
sc.props 48 -none- numeric
rnorm 102 -none- numeric
genes.used 109 -none- character
transformed.bulk 11118 -none- numeric

Bisque returns proportions that sum to one:

stopifnot(max(abs(1 - colSums(bisque_report$bulk.props))) <= 1e-10)

We undo that using the residual. The meaning of rnorm is not clear from the manual, so this might be incorrect:

explained_fractions <-
  bisque_report %>% {
    1 - (sqrt((.)$rnorm) / sqrt(colSums(fgcz_data[(.)$genes.used, ]**2)))
  }

Rescale proportions to the “explained fractions”.

fgcz_darm_bisque <-
  t(t(bisque_report$bulk.props) * explained_fractions)

Cluster bulk samples by composition.

samples_order <-
  fgcz_darm_bisque %>%
  {
    (.)[, hclust(dist(t(.)))$order]
  } %>%
  colnames()

Save to disk and visualize inferred cell type composition by bulk sample.

visualize_and_save <-
  function(., prefix = deparse(substitute(.))) {
    as <- {
      function(ext) paste(prefix, ext, sep = "_")
    }

    (.)[, samples_order] %>%
      #
      save_heatmap(as("heat.png")) %>%
      save_barchart(as("bars.png")) %>%
      #
      as.data.frame() %>%
      ind2col("celltype") %>%
      to_csv(as("data.csv"))
  }
visualize_and_save(fgcz_darm_bisque)

The cell type proportions returned by Bisque vary rather more than expected. The samples come from four different hospitals (the field fgcz_meta$Source), which could introduce the strongest batch effect. We can also compare that to the RIN (RNA integrity number). Age could be another important factor.

PC1_versus <-
  function(., on_the_y_axis) {
    prcomp(t(.))$x %>%
      as.data.frame() %>%
      assert_fgcz_meta_order() %>%
      cbind(fgcz_meta) %>%
      cbind(q = (2 * rank((.)$PC1) / length((.)$PC1) - 1)) %>%
      cbind(y = (.)[[on_the_y_axis]]) %>%
      {
        ggplot(., aes(x = q, y = y, shape = Condition, color = Source)) +
          labs(
            x = "Deviation from the average composition (~PC1)",
            y = on_the_y_axis,
            color = "Sample source",
            shape = "Condition"
          ) +
          geom_point(size = 5, alpha = 0.2 + abs((.)$q)) +
          scale_color_brewer(palette = "Set1")
      }
  }

Principal components of cell type proportions.

fgcz_darm_bisque %>%
  PC1_versus("PC2")

fgcz_darm_bisque %>%
  PC1_versus("RIN")

fgcz_darm_bisque %>%
  PC1_versus("Age")

MuSiC

# Prevent errors from MuSiC
exprs <- xbioc::exprs
pVar <- xbioc::pVar

music_report.darm <-
  MuSiC::music_prop(
    bulk.eset = fgcz_eset,
    sc.eset = darm_eset,
    clusters = darm_meta$celltype,
    samples = names(darm_data)
  ) %>%
  suppressMessages()
music_report.darm %>%
  summary() %>%
  kable()
Length Class Mode
Est.prop.weighted 612 -none- numeric
Est.prop.allgene 612 -none- numeric
Weight.gene 11118 -none- numeric
r.squared.full 102 -none- numeric
Var.prop 612 -none- numeric

Estimate cell type proportions (celltype x sample).

fgcz_darm_music <-
  t(music_report.darm$Est.prop.weighted) %>% {
    (.)[rownames(fgcz_darm_bisque), ]
  }
visualize_and_save(fgcz_darm_music)

Principal components of cell type proportions.

fgcz_darm_music %>%
  PC1_versus("PC2")

fgcz_darm_music %>%
  PC1_versus("RIN")

fgcz_darm_music %>%
  PC1_versus("Age")

We would like to know whether there is the cell type composition differs in ALS vs Control samples. First, the following figure shows the distribution of the first principal component (of cell type proportions) separated by sample source.

fgcz_darm_music.pca <-
  fgcz_darm_music %>%
  {
    as.data.frame(prcomp(t(.))$x)
  } %>%
  assert_fgcz_meta_order() %>%
  cbind(fgcz_meta)
fgcz_darm_music.pca %>% {
  ggplot(., aes(x = PC1, fill = Source, linetype = Source)) +
    labs(fill = "Sample source") +
    guides(linetype = FALSE) +
    geom_density(
      position = "identity", alpha = 0.4
    ) +
    geom_histogram(
      aes(y = stat(density) / 5),
      position = "dodge", bins = 20, color = 1, linetype = 1
    ) +
    hist_theme +
    theme(axis.text.y = element_blank()) +
    theme(
      panel.grid.major.y = element_blank(),
      panel.grid.minor.y = element_blank(),
    ) +
    scale_fill_brewer(palette = "Set1")
}

The separation by Source seen in the above figure suggests looking at the Condition (ALS vs Control) for each Source individually.

fgcz_darm_music.pca %>%
  {
    ggplot(., aes(x = PC1, fill = Condition)) +
      geom_density(
        position = "identity", alpha = 0.4
      ) +
      geom_histogram(
        aes(y = stat(density) / 5),
        position = "dodge", bins = 20, color = 1, linetype = 1
      ) +
      hist_theme +
      theme(
        axis.text.x = element_blank(),
        axis.text.y = element_blank()
      ) +
      scale_fill_brewer(palette = "Set1") +
      facet_grid(cols = vars(Source))
  } %>%
  ggsave(
    filename = out_file("fgcz_darm_music_PC1.png"),
    width = WIDTH1,
    height = WIDTH1 / 4
  ) %>%
  invisible()

Among those, samples from Lo show a conspicuous further separation by Condition, supported by the following two-sided two-sample t-test.

fgcz_darm_music.pca %>%
  filter(Source != "PD") %>%
  group_by(Source) %>%
  group_map(~ {
    t.test(
      x = filter((.x), Condition == "ALS")$PC1,
      y = filter((.x), Condition != "ALS")$PC1,
      alternative = "two"
    ) %>% {
      list(
        `Sample source` = dplyr::pull((.y)),
        `p-value` = round((.)$p.value, 3),
        `t-statistic` = round((.)$statistic, 2),
        `#dof` = round((.)$parameter, 1)
      )
    }
  }) %>%
  data.table::rbindlist() %>%
  kable()
Sample source p-value t-statistic #dof
Lo 0.023 -2.48 18.2
NB 0.672 -0.43 17.0
Ox 0.616 -0.51 21.6

Baseline NNLS

For the code see here.

fgcz_darm_nnls <-
  path_to("code/*/*-NNLS_Darmanis/a_*/celltypes.csv") %>%
  # The first column is not unique
  # Keep header as is, e.g. "H-0C083"
  utils::read.delim(., check.names = F) %>%
  # `cell type` ~> `celltype`
  fix_names() %>%
  # Collapse cell types:
  group_by_and_sum("celltype")
visualize_and_save(fgcz_darm_nnls)

fgcz_darm_nnls %>%
  PC1_versus("PC2")

fgcz_darm_nnls %>%
  PC1_versus("RIN")

fgcz_darm_nnls %>%
  PC1_versus("Age")

These results are similar enough to MuSiC’s to warrant a closer look. For each sample we compute the cosine similarity between the two methods and plot a histogram of those separated by sample source.

# Check that samples are arranged consistently
stopifnot(all(colnames(fgcz_darm_music) == colnames(fgcz_darm_nnls)))
stopifnot(all(rownames(fgcz_darm_music) == rownames(fgcz_darm_nnls)))

colSums(col_norm2(fgcz_darm_music) * col_norm2(fgcz_darm_nnls)) %>%
  data.frame(
    x = .,
    s = fgcz_meta$Source
  ) %>%
  {
    ggplot(., aes(x = x, fill = s, linetype = s)) +
      labs(fill = "Sample source") +
      labs(x = "Cosine similarity of a sample (NNLS vs MuSiC)") +
      guides(linetype = FALSE) +
      geom_density(
        position = "identity", alpha = 0.4
      ) +
      geom_histogram(
        aes(y = stat(density) / 5),
        position = "dodge", bins = 30, color = 1, linetype = 1,
      ) +
      hist_theme +
      theme(axis.text.y = element_blank()) +
      theme(
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
      ) +
      scale_fill_brewer(palette = "Set1")
  }

From Allen Brain M1

In this section we use Allen Brain M1 as the reference dataset.

Bisque

Repackage:

abm1_eset <- Biobase::ExpressionSet(
  # Expression data
  assayData = as.matrix(abm1_data),
  # Metadata
  phenoData = Biobase::AnnotatedDataFrame(
    data = data.frame(
      row.names = rownames(abm1_meta),
      cellType = abm1_meta$celltype,
      SubjectName = abm1_meta$donor,
      check.names = FALSE,
      check.rows = FALSE
    ),
    varMetadata = data.frame(
      row.names = c("cellType", "SubjectName"),
      labelDescription = c("cellType", "SubjectName")
    )
  )
)

Deconvolution:

bisque_report <-
  BisqueRNA::ReferenceBasedDecomposition(
    # BULK DATA
    bulk.eset = fgcz_eset,
    # REFERENCE
    sc.eset = abm1_eset,
    #
    use.overlap = FALSE,
    markers = abm1_markergenes,
    verbose = FALSE
  )
## Warning in BisqueRNA::ReferenceBasedDecomposition(bulk.eset = fgcz_eset, :
## Only two individuals detected in single-cell data. While Bisque will run, we
## recommend at least three subjects for reliable performance.
bisque_report$bulk.props %>%
  visualize_and_save("fgcz_abm1_bisque")

MuSiC

# Read from disk if already available
# otherwise it takes ages
if (!file.exists(out_file("fgcz_abm1_music_data.csv"))) {
  # MuSiC with ABM1
  music_report.abm1 <-
    MuSiC::music_prop(
      bulk.eset = fgcz_eset,
      sc.eset = abm1_eset,
      clusters = abm1_meta$celltype,
      samples = names(abm1_data)
    ) %>%
    suppressMessages()

  fgcz_abm1_music <-
    t(music_report.abm1$Est.prop.weighted)
  
  fgcz_abm1_music %>%
    visualize_and_save("fgcz_abm1_music")
} else {
  fgcz_abm1_music <- 
    from_csv(out_file("fgcz_abm1_music_data.csv"))
}

Discussion

(in progress)

Acknowledgements

(in progress)

Cite as

RA, Cell type deconvolution from bulk RNA-seq of the human brain, 2020-12-27, http://bit.ly/deco_ra.